{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "from funcs import *\n",
    "from pandas.tseries.offsets import *\n",
    "import sys\n",
    "reload(sys)\n",
    "sys.setdefaultencoding('utf8')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 统计季度交易特征"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "indexs = ['订单总数','订单时间间隔均值（天）','订单时间间隔方差（天）','交易总量','交易量最大值','交易量最小值','交易量方差',\n",
    "          '交易量均值','交易额总量','交易额最大值','交易额最小值','交易额方差','交易额均值','交易稳定性',\n",
    "          '账期订单占比','账期订单交易量均值','账期订单交易额均值']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "def stop_time(start):\n",
    "    stop = start - DateOffset(months=3, days=start.day)\n",
    "    return stop"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def ac_feature(trade):\n",
    "    feature = []\n",
    "    stop = stop_time(trade[u'创建时间'].iloc[0])\n",
    "    trade_3m = trade[trade[u'创建时间']>stop]\n",
    "    if trade_3m.shape[0]>0:               \n",
    "        num = trade_3m.shape[0] #订单数\n",
    "        feature.append(num)\n",
    "        tim_st = time_gap(trade_3m[u'创建时间'])\n",
    "        feature.append(tim_st[0]) #均值\n",
    "        feature.append(tim_st[1]) #方差\n",
    "        \n",
    "        t_num = trade_3m[u'销售数量（吨）'].agg(['sum','max','min','std','mean'])\n",
    "        t_money = trade_3m[u'销售金额（元）'].agg(['sum','max','min','std','mean'])\n",
    "        feature.extend(t_num)\n",
    "        feature.extend(t_money)     \n",
    "        feature.append(trade_stab_all(trade_3m))\n",
    "    else:#近三个月无订单\n",
    "        feature.extend([0]*14) \n",
    "        \n",
    "    zhangqi = trade_3m[trade_3m[u'是否账期']==u'是']\n",
    "    if zhangqi.shape[0]>0:\n",
    "        feature.append(zhangqi.shape[0]*1.0/trade_3m.shape[0])\n",
    "        feature.append(zhangqi[u'销售数量（吨）'].mean())\n",
    "        feature.append(zhangqi[u'销售金额（元）'].mean())\n",
    "    else:\n",
    "        feature.append(0)\n",
    "        feature.append(0)\n",
    "        feature.append(0)\n",
    "    return feature"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def df_sub(record,trade):\n",
    "    df = pd.DataFrame(index=indexs)    \n",
    "    tmps = zip(record[u'应收单号'],record[u'起算日期'])\n",
    "    for t in tmps:\n",
    "        trade_sub = trade[trade[u'创建时间']<t[1]]         \n",
    "        if trade_sub.shape[0]>1:#有历史订单\n",
    "            fs = ac_feature(trade_sub.iloc[1:,:])\n",
    "        else:#新客户\n",
    "            fs = [np.nan]*len(indexs)\n",
    "        df[t[0]]=fs\n",
    "    return df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "zq_files = os.listdir('./data/loan_list/')\n",
    "files = VisitDir('./data/client_data/')\n",
    "files =[f for f in files if '交易记录' in f]\n",
    "\n",
    "samples = pd.DataFrame()\n",
    "for f in zq_files:\n",
    "    record = pd.read_excel(os.path.join('./data/loan_list/',f))\n",
    "    record = record.iloc[1:,:]  #账期数据\n",
    "    for f1 in files:\n",
    "        if f1.split('/')[-2]==f.split('.')[0]:\n",
    "            trade = pd.read_excel(f1)   #交易数据\n",
    "            trade = trade.dropna(thresh=5)\n",
    "            df = df_sub(record,trade)          \n",
    "            if samples.shape[0]==0:\n",
    "                samples=df\n",
    "            else:\n",
    "                samples = samples.merge(df,left_index=True,right_index=True,how='left')\n",
    "            continue\n",
    "\n",
    "samples =samples.T"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(1430, 17)"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "samples.to_csv('./new_out/3m_feature_trade.csv')\n",
    "samples.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 季度逾期统计"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style>\n",
       "    .dataframe thead tr:only-child th {\n",
       "        text-align: right;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>业务类型</th>\n",
       "      <th>客户</th>\n",
       "      <th>已收金额</th>\n",
       "      <th>应收</th>\n",
       "      <th>应收余额</th>\n",
       "      <th>应收单号</th>\n",
       "      <th>应收总额</th>\n",
       "      <th>应收日期</th>\n",
       "      <th>收款状态</th>\n",
       "      <th>账期</th>\n",
       "      <th>起算日期</th>\n",
       "      <th>逾期</th>\n",
       "      <th>销售合同号</th>\n",
       "      <th>zq_day</th>\n",
       "      <th>yq_day</th>\n",
       "      <th>季度逾期占比</th>\n",
       "      <th>季度提前还款占比</th>\n",
       "      <th>季度平均逾期天数</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>账期业务</td>\n",
       "      <td>佛山市南海区松岗明珠塑料有限公司</td>\n",
       "      <td>184520.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>YS20170228152309</td>\n",
       "      <td>184520.0</td>\n",
       "      <td>2017-03-30</td>\n",
       "      <td>已收款</td>\n",
       "      <td>30天</td>\n",
       "      <td>2017-02-28</td>\n",
       "      <td>11天</td>\n",
       "      <td>ZSL20170227LL-C003WJ</td>\n",
       "      <td>30</td>\n",
       "      <td>11</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.333333</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>账期业务</td>\n",
       "      <td>佛山市南海区松岗明珠塑料有限公司</td>\n",
       "      <td>171990.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>YS20170103150373</td>\n",
       "      <td>171990.0</td>\n",
       "      <td>2017-01-13</td>\n",
       "      <td>已收款</td>\n",
       "      <td>10天</td>\n",
       "      <td>2017-01-03</td>\n",
       "      <td>0天</td>\n",
       "      <td>ZSL20161228LL-C041WJ</td>\n",
       "      <td>10</td>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.250000</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>账期业务</td>\n",
       "      <td>佛山市南海区松岗明珠塑料有限公司</td>\n",
       "      <td>198450.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>YS20161117147341</td>\n",
       "      <td>198450.0</td>\n",
       "      <td>2016-12-02</td>\n",
       "      <td>已收款</td>\n",
       "      <td>15天</td>\n",
       "      <td>2016-11-17</td>\n",
       "      <td>-2天</td>\n",
       "      <td>ZSL20161025LL-C009WJ</td>\n",
       "      <td>15</td>\n",
       "      <td>-2</td>\n",
       "      <td>0.125000</td>\n",
       "      <td>0.250000</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>账期业务</td>\n",
       "      <td>佛山市南海区松岗明珠塑料有限公司</td>\n",
       "      <td>198450.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>YS20161115147142</td>\n",
       "      <td>198450.0</td>\n",
       "      <td>2016-11-29</td>\n",
       "      <td>已收款</td>\n",
       "      <td>15天</td>\n",
       "      <td>2016-11-14</td>\n",
       "      <td>0天</td>\n",
       "      <td>ZSL20161025LL-C009WJ</td>\n",
       "      <td>15</td>\n",
       "      <td>0</td>\n",
       "      <td>0.142857</td>\n",
       "      <td>0.285714</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>账期业务</td>\n",
       "      <td>佛山市南海区松岗明珠塑料有限公司</td>\n",
       "      <td>186940.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.0</td>\n",
       "      <td>YS20161025145675</td>\n",
       "      <td>186940.0</td>\n",
       "      <td>2016-11-09</td>\n",
       "      <td>已收款</td>\n",
       "      <td>15天</td>\n",
       "      <td>2016-10-25</td>\n",
       "      <td>0天</td>\n",
       "      <td>ZSL20161011LL-C038WJ</td>\n",
       "      <td>15</td>\n",
       "      <td>0</td>\n",
       "      <td>0.125000</td>\n",
       "      <td>0.250000</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   业务类型                客户      已收金额   应收  应收余额              应收单号      应收总额  \\\n",
       "0  账期业务  佛山市南海区松岗明珠塑料有限公司  184520.0  NaN   0.0  YS20170228152309  184520.0   \n",
       "1  账期业务  佛山市南海区松岗明珠塑料有限公司  171990.0  NaN   0.0  YS20170103150373  171990.0   \n",
       "2  账期业务  佛山市南海区松岗明珠塑料有限公司  198450.0  NaN   0.0  YS20161117147341  198450.0   \n",
       "3  账期业务  佛山市南海区松岗明珠塑料有限公司  198450.0  NaN   0.0  YS20161115147142  198450.0   \n",
       "4  账期业务  佛山市南海区松岗明珠塑料有限公司  186940.0  NaN   0.0  YS20161025145675  186940.0   \n",
       "\n",
       "        应收日期 收款状态   账期       起算日期   逾期                 销售合同号  zq_day  yq_day  \\\n",
       "0 2017-03-30  已收款  30天 2017-02-28  11天  ZSL20170227LL-C003WJ      30      11   \n",
       "1 2017-01-13  已收款  10天 2017-01-03   0天  ZSL20161228LL-C041WJ      10       0   \n",
       "2 2016-12-02  已收款  15天 2016-11-17  -2天  ZSL20161025LL-C009WJ      15      -2   \n",
       "3 2016-11-29  已收款  15天 2016-11-14   0天  ZSL20161025LL-C009WJ      15       0   \n",
       "4 2016-11-09  已收款  15天 2016-10-25   0天  ZSL20161011LL-C038WJ      15       0   \n",
       "\n",
       "     季度逾期占比  季度提前还款占比  季度平均逾期天数  \n",
       "0  0.000000  0.333333       0.0  \n",
       "1  0.000000  0.250000       0.0  \n",
       "2  0.125000  0.250000       1.0  \n",
       "3  0.142857  0.285714       1.0  \n",
       "4  0.125000  0.250000       1.0  "
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "files = os.listdir('./data/loan_list/')\n",
    "loan_all = pd.DataFrame()\n",
    "for f in files:\n",
    "    record = pd.read_excel(os.path.join('./data/loan_list/',f))\n",
    "    loan_all = loan_all.append(record.iloc[1:,:],ignore_index=True)\n",
    "loan_all['zq_day'] = loan_all[u'账期'].apply(lambda x:re.findall('\\d+',x)[0])\n",
    "loan_all['yq_day'] = loan_all[u'逾期'].apply(lambda x:re.findall('^(-\\d+|\\d+)',x)[0])\n",
    "loan_all['zq_day'] = loan_all['zq_day'].astype(int)\n",
    "loan_all['yq_day'] = loan_all['yq_day'].apply(lambda x:int(x))\n",
    "loan_all=loan_all.drop_duplicates()\n",
    "\n",
    "yq_ratio =[]\n",
    "tq_ratio =[]\n",
    "yq_avg =[]\n",
    "loan_all[u'起算日期'] =pd.to_datetime(loan_all[u'起算日期'])\n",
    "for i in xrange(loan_all.shape[0]):\n",
    "    name = loan_all.iat[i,1]\n",
    "    time = loan_all.iat[i,10]\n",
    "    stop = stop_time(time)\n",
    "    loan_sub = loan_all[(loan_all[u'客户'] == name) & (loan_all[u'起算日期']<time) &(loan_all[u'起算日期']>stop)]\n",
    "    if loan_sub.shape[0]>0:\n",
    "        yq_ratio.append(loan_sub[loan_sub['yq_day']>0].shape[0]*1.0 / loan_sub.shape[0])\n",
    "        tq_ratio.append( loan_sub[loan_sub['yq_day']<0].shape[0]*1.0 / loan_sub.shape[0])\n",
    "        if loan_sub[loan_sub['yq_day']>0].shape[0]>0:\n",
    "            yq_avg.append( loan_sub[loan_sub['yq_day']>0]['yq_day'].mean())\n",
    "        else:\n",
    "            yq_avg.append(0)\n",
    "    else:\n",
    "        yq_ratio.append(np.nan)\n",
    "        tq_ratio.append(np.nan)\n",
    "        yq_avg.append(np.nan)\n",
    "\n",
    "loan_all[u'季度逾期占比']=yq_ratio\n",
    "loan_all[u'季度提前还款占比']=tq_ratio\n",
    "loan_all[u'季度平均逾期天数']=yq_avg\n",
    "loan_all.to_csv('./new_out/3m_loan_all.csv',index=None,encoding='utf-8')\n",
    "loan_all.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
